Workflow

Single-cell transcriptomics of cells infected with influenza virions carrying barcodes. This experiment allows accurate detection of the number of unique virions infecting each cell and its resulting impact on the transcriptome. The single-cell transcriptomics were performed using 10x Chromium.

The basic steps in the analysis are as follows:

Click the nodes to obtain details about each step.

10x transcriptomics FASTQ files

Aligning 10x transcriptomics reads

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
config.yaml
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# YAML configuration file for the analysis

# max CPUs used by any rules
max_cpus: 16

# output directories
fastq10x_dir: results/fastq10x  # FASTQs & QC 10x transcriptomic runs
mkfastq10x_dir: results/fastq10x/mkfastq_output  # `cellranger mkfastq` output
genome_dir: results/genomes  # location of downloaded genomes and annotations
refgenome: results/genomes/refgenome  # STAR reference genome directory
aligned_fastq10x_dir: results/aligned_fastq10x  # aligned 10x transcriptomic reads
#viral_fastq10x_dir: results/viral_fastq10x  # viral tags / barcodes in 10X reads
#analysis_dir: results/analysis  # fine-grained analyses

# cellular genome and GTF ftp sites
cell_genome_ftp: ftp://ftp.ensembl.org/pub/release-98/fasta/canis_familiaris/dna/Canis_familiaris.CanFam3.1.dna.toplevel.fa.gz
cell_gtf_ftp: ftp://ftp.ensembl.org/pub/release-98/gtf/canis_familiaris/Canis_familiaris.CanFam3.1.98.gtf.gz

# viral genome (FASTA), GTF, and Genbank file locations
viral_genome: data/flu_sequences/flu-CA09.fasta
viral_gtf: data/flu_sequences/flu-CA09.gtf
viral_genbank: data/flu_sequences/flu-CA09.gb

# file giving nucleotide identities at viral tag sites
#viraltag_identities: data/flu_sequences/flu-CA09_viral_tags.yaml

# STAR alignment parameters for transcriptomics. Reduce the penalty for
# non-canonical splice sites, which is probably bad for mapping cellular
# reads but is good for mapping viral reads which will have deletions
# not corresponding to splice sites.
scoreGapNoncan: -4
scoreGapGCAG: -4
scoreGapATAC: -4

# URL location of 10X barcode whitelist: **this is for the v3 kit**
cb_whitelist_10x_url: https://github.com/10XGenomics/cellranger/raw/master/lib/python/cellranger/barcodes/3M-february-2018.txt.gz
cb_whitelist_10x: results/aligned_fastq10x/cb_whitelist_10x.txt

cb_len_10x: 16  # length of 10X cell barcode
umi_len_10x: 12  # length of 10X UMI: **this is for the v3 kit**


#----------------------------------------------------------------------------
#                    configuration of experiments
#----------------------------------------------------------------------------
experiments:

  hashing_wt_rapidpilot:
    description: Single-cell transcriptomics using a small-scale pilot rescue
                 of the wildtype viral tag variant. Lab notes are at
                 https://benchling.com/s/etr-Q28fCd1kprRNxAd0v5Hg
    expect_ncells: 2000
    transcriptomics:
      2019-12-03:
        index: SI-GA-A3
        bcl_folder: /shared/ngs/illumina/bloom_lab/191203_M03100_0504_000000000-CNCN9/
        lane: '*'

  hashing_trial1:
    description: Single-cell transcriptomics using the wildtype and dblSyn viral
                 tag variants. Infection volume was chosen based on HA expression
                 measured by flow cytometry. Lab notes are at
                 https://benchling.com/s/etr-i9I0yHiFb0P8wHCxosim
    expect_ncells: 2000
    transcriptomics:
      2020-01-16:
        index: SI-GA-A4
        bcl_folder: /shared/ngs/illumina/bloom_lab/200128_M03100_0528_000000000-CRC4G/
        lane: '*'
      2020-02-18:
        index: SI-GA-A4
        bcl_folder: /shared/ngs/illumina/bloom_lab/200218_D00300_0910_AHCHHJBCX3/Raw/
        lane: 2

  hashing_trial2:
    description: Single-cell transcriptomics using the wildtype and dblySyn viral
                 tag variants. Infection volume was chosen based on the results
                 of `hashing_trial1` and flow cytometry. The incolum volume for
                 the wildtype tag variant was about 12-fold higher, and for the
                 dblSyn tag variant was about 24-fold higher.
    expect_ncells: 2000
    transcriptomics:
      2020-06-02:
        index: SI-GA-B3
        bcl_folder: /home/solexa/ngs/illumina/bloom_lab/200602_M03100_0578_000000000-J3464/
        lane: '*'
      2020-07-01:
        index: SI-GA-B3
        bcl_folder: /shared/ngs/illumina/agreaney/200701_D00300_0988_BHGFGMBCX3/raw/200701_D00300_0988_BHGFGMBCX3/
        lane: 2
      2020-08-26:
        index: SI-GA-B3
        bcl_folder: /shared/ngs/illumina/bloom_lab/bloom_lab/200826_D00300_1040_BHHGLWBCX3/raw/200826_D00300_1040_BHHGLWBCX3/
        lane: '*'

  hashing_trial3_withNH4Cl:
    description: None provided.
    expect_ncells: 2000
    transcriptomics:
      2020-07-24:
        index: SI-GA-B5
        bcl_folder: /shared/ngs/illumina/bloom_lab/200724_M03100_0593_000000000-J33YK/
        lane: '*'
      2020-08-26:
        index: SI-GA-B5
        bcl_folder: /shared/ngs/illumina/bloom_lab/bloom_lab/200826_D00300_1040_BHHGLWBCX3/raw/200826_D00300_1040_BHHGLWBCX3/
        lane: '*'

  hashing_trial3_noNH4Cl:
    description: None provided
    expect_ncells: 2000
    transcriptomics:
      2020-07-24:
        index: SI-GA-B4
        bcl_folder: /shared/ngs/illumina/bloom_lab/200724_M03100_0593_000000000-J33YK/
        lane: '*'

  scProgenyProduction_trial1:
    description: None provided.
    expect_ncells: 2000
    transcriptomics:
      2020-09-25:
        index: SI-GA-B8
        bcl_folder: /shared/ngs/illumina/bloom_lab/200925_D00300_1065_AHHL7NBCX3/raw/200925_D00300_1065_AHHL7NBCX3/
        lane: 2

  scProgenyProduction_trial2:
    description: None provided.
    expect_ncells: 2000
    transcriptomics:
      2020-09-25:
        index: SI-GA-B9
        bcl_folder: /shared/ngs/illumina/bloom_lab/200925_D00300_1065_AHHL7NBCX3/raw/200925_D00300_1065_AHHL7NBCX3/
        lane: '*'

Loading...